{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 采用5折交叉验证，分别用log似然损失和正确率，对Logistic回归模型的正则超参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pregnants</th>\n",
       "      <th>Plasma_glucose_concentration</th>\n",
       "      <th>blood_pressure</th>\n",
       "      <th>Triceps_skin_fold_thickness</th>\n",
       "      <th>serum_insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>Diabetes_pedigree_function</th>\n",
       "      <th>Age</th>\n",
       "      <th>Target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.639947</td>\n",
       "      <td>0.866045</td>\n",
       "      <td>-0.031990</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>0.166619</td>\n",
       "      <td>0.468492</td>\n",
       "      <td>1.425995</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.205066</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-0.852200</td>\n",
       "      <td>-0.365061</td>\n",
       "      <td>-0.190672</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.233880</td>\n",
       "      <td>2.016662</td>\n",
       "      <td>-0.693761</td>\n",
       "      <td>-0.012301</td>\n",
       "      <td>-0.181541</td>\n",
       "      <td>-1.332500</td>\n",
       "      <td>0.604397</td>\n",
       "      <td>-0.105584</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>-0.844885</td>\n",
       "      <td>-1.073567</td>\n",
       "      <td>-0.528319</td>\n",
       "      <td>-0.695245</td>\n",
       "      <td>-0.540642</td>\n",
       "      <td>-0.633881</td>\n",
       "      <td>-0.920763</td>\n",
       "      <td>-1.041549</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-1.141852</td>\n",
       "      <td>0.504422</td>\n",
       "      <td>-2.679076</td>\n",
       "      <td>0.670643</td>\n",
       "      <td>0.316566</td>\n",
       "      <td>1.549303</td>\n",
       "      <td>5.484909</td>\n",
       "      <td>-0.020496</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pregnants  Plasma_glucose_concentration  blood_pressure  \\\n",
       "0   0.639947                      0.866045       -0.031990   \n",
       "1  -0.844885                     -1.205066       -0.528319   \n",
       "2   1.233880                      2.016662       -0.693761   \n",
       "3  -0.844885                     -1.073567       -0.528319   \n",
       "4  -1.141852                      0.504422       -2.679076   \n",
       "\n",
       "   Triceps_skin_fold_thickness  serum_insulin       BMI  \\\n",
       "0                     0.670643      -0.181541  0.166619   \n",
       "1                    -0.012301      -0.181541 -0.852200   \n",
       "2                    -0.012301      -0.181541 -1.332500   \n",
       "3                    -0.695245      -0.540642 -0.633881   \n",
       "4                     0.670643       0.316566  1.549303   \n",
       "\n",
       "   Diabetes_pedigree_function       Age  Target  \n",
       "0                    0.468492  1.425995       1  \n",
       "1                   -0.365061 -0.190672       0  \n",
       "2                    0.604397 -0.105584       1  \n",
       "3                   -0.920763 -1.041549       0  \n",
       "4                    5.484909 -0.020496       1  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#input data\n",
    "train = pd.read_csv(\"FE_pima-indians-diabetes.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(768, 9)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['Target']   \n",
    "X_train = train.drop([\"Target\"], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.sparse import csr_matrix\n",
    "X_train = csr_matrix(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "lr = LogisticRegression()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of each fold is:  [0.48797856 0.53011593 0.4562292  0.422546   0.48392885]\n",
      "cv logloss is: 0.47615970944434044\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "# 交叉验证用于评估模型性能和进行参数调优（模型选择）\n",
    "#分类任务中交叉验证缺省是采用StratifiedKFold\n",
    "#数据集比较大，采用3折交叉验证\n",
    "from sklearn.model_selection import cross_val_score\n",
    "loss = cross_val_score(lr, X_train, y_train, cv=5, scoring='neg_log_loss') #accuracy\n",
    "#loss = cross_val_score(lr, X_train, y_train, cv=5, scoring='accuracy') #accuracy正确率\n",
    "#%timeit loss_sparse = cross_val_score(lr, X_train_sparse, y_train, cv=3, scoring='neg_log_loss')\n",
    "print ('logloss of each fold is: ',-loss)\n",
    "print ('cv logloss is:', -(loss.mean()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### logistic回归的需要调整超参数有：C（正则系数，一般在log域（取log后的值）均匀设置候选参数）和正则函数penalty（L2/L1） 目标函数为：J = C* sum(logloss(f(xi), yi)) + penalty\n",
    "\n",
    "在sklearn框架下，不同学习器的参数调整步骤相同：\n",
    "1.设置参数搜索范围\n",
    "2.生成学习器实例（参数设置）\n",
    "3.生成GridSearchCV的实例（参数设置）\n",
    "4.调用GridSearchCV的fit方法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### log似然损失，5折交叉验证 进行参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='liblinear',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='warn', n_jobs=4,\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "             scoring='neg_log_loss', verbose=0)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "penaltys = ['l1','l2']\n",
    "Cs = [ 0.1, 1, 10, 100, 1000]\n",
    "tuned_parameters = dict(penalty = penaltys, C = Cs)\n",
    "\n",
    "lr_penalty= LogisticRegression(solver='liblinear')\n",
    "grid= GridSearchCV(lr_penalty, tuned_parameters,cv=5, scoring='neg_log_loss',n_jobs = 4,return_train_score='warn')\n",
    "grid.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最小log损失： 0.47602855341117056\n",
      "最佳超参数： {'C': 1, 'penalty': 'l1'}\n"
     ]
    }
   ],
   "source": [
    "print(\"最小log损失：\", -grid.best_score_)\n",
    "print(\"最佳超参数：\", grid.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 准确率，5折交叉验证 进行参数调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=None, solver='liblinear',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='warn', n_jobs=4,\n",
       "             param_grid={'C': [0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
       "             scoring='accuracy', verbose=0)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr_penalty_accuracy= LogisticRegression(solver='liblinear')\n",
    "grid_accuracy= GridSearchCV(lr_penalty_accuracy, tuned_parameters,cv=5, scoring='accuracy',n_jobs = 4,return_train_score='warn')\n",
    "grid_accuracy.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "最佳准确率： 0.7747395833333334\n",
      "最佳超参数： {'C': 0.1, 'penalty': 'l2'}\n"
     ]
    }
   ],
   "source": [
    "print(\"最佳准确率：\", grid_accuracy.best_score_)\n",
    "print(\"最佳超参数：\", grid_accuracy.best_params_) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_C', 'param_penalty', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score', 'split0_train_score', 'split1_train_score', 'split2_train_score', 'split3_train_score', 'split4_train_score', 'mean_train_score', 'std_train_score'])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid.cv_results_.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEGCAYAAABy53LJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3deXgV9dn/8fd9kkAEgiiLshpAqIIiS0REUFurRaAgimCLWttaWwtaax+f2lptq/1p1cfWUlFB3K37BgKWpy4I+AgSFNlRoC5hEUQFFVlz//6YSXoIJ8kZyOSE5PO6rnPlzMx35twzWT6Zme/MmLsjIiKSrkSmCxARkQOLgkNERCJRcIiISCQKDhERiUTBISIikWRnuoDq0KxZM8/Pz890GSIiB5T58+d/4u7Ny46vE8GRn59PYWFhpssQETmgmNkHqcbrUJWIiESi4BARkUgUHCIiEkmdOMchInXXzp07KSoqYtu2bZkupcbKzc2lTZs25OTkpNVewSEitVpRURF5eXnk5+djZpkup8ZxdzZt2kRRURHt27dPax4dqhKRWm3btm00bdpUoVEOM6Np06aR9sgUHCJS6yk0KhZ1+yg4RETKGDn+DUaOfyPTZdRYCo4K6IdHRKpCo0aNSt8PGDCAJk2aMHjw4JRtR48eTffu3enSpQsHHXQQ3bt3p3v37jz99NNpf95zzz3Hrbfeut91l0cnx0VEqtFVV13F1q1bGT9+fMrp48aNA+D9999n8ODBLFiwIGW7Xbt2kZ2d+k/4sGHDqqbYcmiPQ0SkGp122mnk5eXt07z9+vXjmmuu4eSTT+aOO+5g0qRJnHDCCfTo0YMzzjiDDRs2ADBx4kSuuOIKAM4//3x+8Ytf0LdvXzp06MBzzz233+ugPQ4RqTP++MISlq7dUmm7peuCNukcqu7SqjG//27X/a4tXVu2bGHmzJkAfPbZZwwZMgQz4+677+a2227j5ptv3mueDRs28Prrr7No0SJGjBix33skCg4RkQPIeeedV/r+ww8/ZMSIEaxfv57t27fTuXPnlPOcddZZmBndunVjzZo1+12DgkNE6ox09wxK9jSe+OmJcZazTxo2bFj6fvTo0fz2t79l4MCBvPTSS/z5z39OOU/9+vVL37v7ftegcxwiIgeozZs307p1a9ydBx98sNo+V8EhIlKN+vfvz7nnnsvLL79MmzZtmD59+j4v6w9/+APDhg3jlFNO4bDDDqvCKiumQ1UiIjH78ssvS9/PmjUrrXny8/NZvHjxHuNmz569x/A555zDOeecs9e8F198cen7Rx55pNxa9pWCQ0SkjJp4bqMm0aEqERGJRMEhIiKRKDhERCQSBYeIiESi4BARKev+QcFLUlJwiIjErOS26gsWLODEE0+ka9eudOvWjSeeeGKvtrqtuoiIlGrQoAEPPfQQnTp1Yu3atfTq1YvvfOc7NGnSpLTNgXBbdQWHiEg1Sb4JYatWrWjRogUbN27cIzgq0q9fP0455RRmzZrF2WefTfv27bnxxhvZsWMHzZs355FHHqFFixZMnDiRxYsXc/vtt3P++efTtGlT5s2bx/r167ntttt0d1wRkbS9eDWsX1R5u/ULg6/pnOc4/Fg4M/XNBSvy5ptvsmPHDjp27BhpPt1WXUSkDlq3bh0XXHABDz74IIlEtFPNuq26iEh1SnfPoGRP44dTq7yELVu2MGjQIP70pz/Rp0+fyPPrtuoiInXIjh07GDZsGBdeeCHnnnvufi9Pt1UXEanlnnzySWbOnMkDDzxQ2s22vF5T6cjUbdWtKnZbarqCggIvLCyMPF9NfgqYiKRn2bJlHH300dFmivFQVU2VajuZ2Xx3LyjbNtY9DjMbYGYrzGylmV1dQbvhZuZmVhAOjzKzBUmvYjPrHk6bES6zZFqLONdBROqgH06tU6ERVWwnx80sCxgHnA4UAfPMbLK7Ly3TLg+4HJhbMs7d/wH8I5x+LDDJ3ZP350a5e/RdCBER2W9x7nH0Bla6+2p33wE8DgxN0e4G4BZgWznL+R7wWDwlikhdUBcOye+PqNsnzuBoDXyUNFwUjitlZj2Atu4+pYLljGTv4Lg/PEx1rZlZqpnM7BIzKzSzwo0bN+5D+SJSG+Tm5rJp0yaFRzncnU2bNpGbm5v2PHFex5HqD3rpd87MEsBfgYvKXYDZCcBWd09+8O4od18THuJ6BrgAeGivD3KfAEyA4OT4vqyAiBz42rRpQ1FREfoHsny5ubm0adMm7fZxBkcR0DZpuA2wNmk4DzgGmBHuNBwOTDazIUnnL86jzN6Gu68Jv35hZo8SHBLbKzhERABycnJo3759psuoVeI8VDUP6GRm7c2sHkEITC6Z6O6b3b2Zu+e7ez4wBygNjXCP5FyCcyOE47LNrFn4PgcYDCTvjYiISMxi2+Nw911mNgaYDmQB97n7EjO7Hih098kVL4GTgSJ3X500rj4wPQyNLOAl4J4YyhcRkXLEeq8qd58GTCsz7rpy2p5aZngG0KfMuK+AXlVapIiIRKJbjoiISCQKDhERiUTBISIikSg4REQkEgWHiIhEoicAVuC6TVeF72ZntA4RkZpEexwiIhKJgkNERCJRcIiISCQKDhERiUTBISIikSg4REQkEgWHiIhEouAQEZFIFBwiIhKJgkNERCJRcIiISCQKDhERiUTBISIikSg4REQkEgWHiIhEouAQEZFIFBwiIhKJgkNERCJRcIiISCQKDhERiUTBIVVm5Pg3GDn+jUyXISIxy850AVJ7XLfpqvDd7IzWISLx0h6HSIYsubEfS27sl+kyDhjaXtHEub0UHCIiEomCQ0REIlFwiIhIJLEGh5kNMLMVZrbSzK6uoN1wM3MzKwiHR5nZgqRXsZl1D6f1MrNF4TLHmpnFuQ4iIrKn2ILDzLKAccCZQBfge2bWJUW7POByYG7JOHf/h7t3d/fuwAXA++6+IJx8F3AJ0Cl8DYhrHUREZG9x7nH0Bla6+2p33wE8DgxN0e4G4BZgWznL+R7wGICZtQQau/sb7u7AQ8BZVV65iIiUK87gaA18lDRcFI4rZWY9gLbuPqWC5YwkDI5w/qKKllmVlu1qzafFDeNavIjIASnOCwBTnXvw0olmCeCvwEXlLsDsBGCruy9OZ5ll5r2E4JAW7dq1S6/iJDt3F3PL10PYSTZ3rd5Enw5NIy9DRKQ2inOPowhomzTcBlibNJwHHAPMMLP3gT7A5JIT5KHz+M/eRsky21SwzFLuPsHdC9y9oHnz5pGLz8lK8PsGT9HAtjNq4lzGv7aK4OiYiEjdFmdwzAM6mVl7M6tHEAKTSya6+2Z3b+bu+e6eD8wBhrh7IZTukZxLcG6kZJ51wBdm1ifsTXUhMCmuFcjP+oTbGz7AGV0O46YXl/PTh+ezZdvOuD5OROSAEFtwuPsuYAwwHVgGPOnuS8zsejMbksYiTgaK3H11mfGXAhOBlcAq4MUqLHsvDWwHd47qye8GHc3Lyzcw5O+zWbp2S5wfKSJSo8V6k0N3nwZMKzPuunLanlpmeAbB4auy7QoJDnFVGzPj4v4dOK5tE0b/4y2G3fk6fzrrGM4taFv5zCIitYyuHI/g+PxDmXp5f3q2O4Srnl7Ib55dyLaduzNdlohItVJwRNQ8rz4P/7g3l57akcfe/Ijhd/8fH326NdNliYhUGwXHPsjOSvDrAUdxz4UFfLBpK4PGzuKV5R9nuiwRkWqh4NgPp3c5jCmX9aPNIQ340QOF3Dp9ObuL1WVXRGo3Bcd+OqJpQ579eV9GFrRl3KuruPC+uXzy5fZMlyUiEhsFRxXIzcni5uHduGV4Nwrf/4zBY2cz/4NPM12WiEgsFBxVaERBW579eV/qZScYOX4O987+t642F5FaR8FRxbq2OpgXLuvHqd9owQ1TljLm0bf5cvuuTJclIlJlFBwxOPigHCZc0ItfDziKFxevY8gds3n34y8yXZaISJVQcMQkkTAuPbUj/7i4D1u+3sXQO17n+bfXZLosEZH9Fjk4zCxhZo3jKKY2OrFjU6Ze3o9jWjfmiicWcO3zi9m+S1ebi8iBK63gMLNHzayxmTUElgIrzOyqeEurPQ5rnMujP+nDT/q35+E5HzBi/BzWfP51pssSEdkn6e5xdHH3LQSPaZ0GtCN4FrikKScrwTWDunDXqJ6s2vAlg8fO4rV3N2a6LBGRyNINjhwzyyEIjknuvpNynrwnFTvz2JZMHnMSLfJyuej+N7n9pXcp1tXmInIASTc4xgPvAw2BmWZ2BKCHUuyjDs0b8dzovgzr3prbX3qPix6Yx6df7ch0WSIiaUkrONx9rLu3dveBHvgA+GbMtdVqDeplc9uI47hx2LHMWbWJwWNnseCjzzNdlohIpdI9Of6L8OS4mdm9ZvYW8K2Ya6v1zIzvn9COpy89ETPj3Lv/j4ffeF9Xm4tIjZbuoaofhSfHzwCaAz8E/hxbVXVMtzZNmHp5P046shnXTlrCFU8sYOsOXW0uIjVTusFh4deBwP3u/k7SOKkCTRrU474fHM+vTu/M5HfWMvSO11m54ctMlyUispd0g2O+mf0vQXBMN7M8oDi+suqmRMK47LROPPSj3mz6agdD75jN1IXrMl2WiMge0g2OHwNXA8e7+1agHsHhKolB/07NmXJZPzofnsfoR9/i+heWsnO3clpEaoZ0e1UVA22A35nZ/wB93X1hrJXVca2aHMQTl5zIRX3zue/1f3PehDms37wt02WJiKTdq+rPwC8IbjeyFLjczG6KszCBetkJ/jCkK2O/14Nl67YwaOwsXl/5SabLEpE6Lt1DVQOB0939Pne/DxgADIqvLEk25LhWTB5zEoc0rMcF985l3KsrdbW5iGRMlLvjNkl6f3BVFyIVO7JFHpNGn8Sgbq24dfoKfvJQIZu37sx0WSJSB6UbHDcBb5vZA2b2IDAfuDG+siSVhvWzGXted/44pCsz39vI4DtmsXjN5kyXJSJ1TLonxx8D+gDPhq8T3f3xOAuT1MyMH/TN54mfnsiu3c7Zd/0fj735oa42F5FqU2FwmFnPkhfQEigCPgJaheMkQ3q2O4Qpl/XjhPaH8ptnF/FfTy3k6x16QJSIxC+7kum3VTDN0f2qMqppo/o88MPe/O3l9xj78nssWbuZu8/vRX6zhpkuTURqsQqDw93r9B1wzYtxq9mPZc9KGFee3pke7ZrwyycW8N2/z+Z/RhzHd7oenunSRKSWSvc6jrNTvE4zsxZxF5hJ+bv+TYedK2HmrbBhGdTg8wjf/EYLXhjTj/bNG/LTh+dz07Rl7NLV5iISgyi3HJkIjApf9wBXAq+bWe18hKw7WxKNg8ccvvInuLMP/L0X/Os6+GgeFNe8P8ptD23AUz87kVEntGP8zNV8f+JcNmzR1eYiUrXSDY5i4Gh3P8fdzwG6ANuBE4Bfx1VcRpmxKas5/845Eq5cDoNugybt4I1xcO+34S9Hw5QrYeXLsKvmPL2vfnYW/2/YsfxlxHEsLPqcQX+fzdzVmzJdlojUIukGR767f5w0vAHo7O6fAuVehWZmA8xshZmtNLOrK2g33MzczAqSxnUzszfMbImZLTKz3HD8jHCZC8JX/IfLGreE4y+GC5+Hq1bC2fdA297wzmPwyNlw65HwzE9g6STY8VXs5aTj7J5teH70STSqn833J85lwsxV6rIrIlWisl5VJWaZ2RTgqXB4OMGzxxsCKZ93amZZwDjgdIJuvPPMbLK7Ly3TLg+4HJibNC4beAS4wN3fMbOm7BlQo9y9MM3aq9ZBh0C3EcFr59ew6lVYPgVWvAiLnoTsXOj4LThqEHQ+Exo2zUiZAEcd3pjJY07iqqcWcuO05cz/4DNuPfc4GufmZKwmETnwpRsco4GzgX4ED3B6EHjGg39hy+t51RtY6e6rAczscWAowU0Sk90A3AL8V9K4M4CF4QOjcPeaeawl5yA4amDw2r0LPnwjCJFlU2DFNLAEHHESHDU4CJImbau9xLzcHO46vyf3zv43N724nCF/n82do3rRpVXjaq9FRGqHdK8cd2A28ArwEjDTKz/u0ZrgYsESReG4UmbWA2jr7lPKzNsZcDObbmZvmdl/l5l+f3iY6lozS/kkQjO7xMwKzaxw48aNlZRaBbKyoX1/OPNm+OViuGQG9LsSvtoI//w13H4MjD8lIz20zIyL+3fg8Uv6sHXHbobd+TpPFX5U+YwiIimk2x13BPAmwSGqEcBcMxte2WwpxpX+tTSzBPBX4Fcp2mUT7N2MCr8OM7PTwmmj3P1YoH/4Stmry90nuHuBuxc0b968klKrmBm06gGnXQuj58KY+fDtP0JWTkZ7aB2ffyhTL+9Pz3aHcNXTC/nNswvZtlNXm4tINOkeqrqG4Ol/GwDMrDnBnsfTFcxTBCQfm2kDrE0azgOOAWaEOw2HA5PNbEg472vu/kn4edOAnsDL7r4GwN2/MLNHCQ6JPZTmemRGsyOh3xXBa8s6WDE1OJz1xjh4/W/Q6PDgUNZRgyC/P2TXi62U5nn1efjHvbntX+9y14xVLFqzmbtG9aLtoQ1i+0wRqV3S7VWVKAmN0KY05p0HdDKz9mZWDzgPmFwy0d03u3szd89393xgDjAkPOk9HehmZg3CE+WnAEvNLNvMmgGYWQ4wGFic5jrUDHv00FqVkR5a2VkJfj3gKO65sIAPNm1l0NhZvLL848pnFBEh/T2Of5rZdOCxcHgkMK2iGdx9l5mNIQiBLOA+d19iZtcDhe4+uYJ5PzOzvxCEjwPT3H1q2ItrehgaWQR7PfekuQ41z0FNMtpD6/QuhzHlsn5c+shb/OiBQkZ/syNXnv4NshIpTxuJiABpBoe7X2Vm5wAnEZy7mODuz6Ux3zTKBIy7X1dO21PLDD9C0CU3edxXQK90aj7gZKiH1hFNG/Lsz/vy+0lLGPfqKhZ89Dl/O68HzRrVr5Lli0jtk+4eB+7+DPBMjLVIiZIeWu37w4A/w7oFQYAsnxL00Prnr6Fl9yBEjh4MzY8KTsjvo9ycLG4e3o1eRxzCtZMWM3jsbMaN6kGvIw6twpUSkdqisudxfGFmW1K8vjCzLdVVZJ1WUQ+tV6u2h9aI49vyzKV9qZedYOT4Odw3+9+62lxE9lLZbdXzqqsQSVNaPbQGBnsj+9BD65jWB/PCZf341ZPvcP2Upcz/8DNuPqcbjeqnvXMqIrWc/hocyEp6aB1/MXz9Obz3v7DsBXjncSi8D+ofDJ2/ExzOOvLbUC+9BzwdfFAOEy7oxfiZq7l1+nKWrdvC3ef3ovNh+j9CRBQctUcV99BKJIxLT+3IcW0P5vLH3mboHa9z09nHclaP1hXOJyK1n4KjNqrCHlp9OzZj6uX9GfPoW1zxxALmf/AZvxt8NPWzs6pxhUSkJqnZz0WV/VfuPbQ+2fMeWq+Vfw+twxrn8uhP+vCT/u15eM4HjBg/hzWff13tqyIiNYOCoy7Zo4fWHLjsrbR7aOVkJbhmUBfuGtWTVRu+ZPDYWbz2bjXcPFJEahwFR13WtGPQO+vilyp4yuEv93jK4ZnHtmTymJNokZfLRfe/ye0vvUtxsbrsitQlOschgQg9tDoc+W2eG92X3z23mNtfeo+3Pvyc20d2z/QaiEg1UXDI3lL20JoanFgPe2g16PgtbvvGQPq2Opbf/nMd3/37bH61uyXfyFqX6epFJGYKDqlYBT20bMU0hluCAe16c8/Grvz1qzM5Kmc9rSYtJpEwshNW+jXLjKxEguwsI2F7TittE7aL1iZBIgHZiQRZCYLPSJ7PUswfLiOhmzmK7BMFh6SvnHtoNVo+lV/uupdf5sKn3gh/+27cwTGKw5cDxeEpNfeSccF4L31vpfOUTKPMcDGJ0vGOsaPMNEqW5//53PKWHyzGcBJBxwEzwILuygZGAjfDLPxMC9qZJTAsnBa0L/1KsBxLJIBweiIYbwkDElgiaL9te3BV/5bxY6rl2xddzQrWr7fnALBlwmUZruTAULK9Ou/YTk69qr1pqYJD9k1JD62SXlqbVrH+zoHU8x007TEU8KBrrxcDjrvjXowXJ78v/s/7kmkEw5ROK5keLM+9GMoM/+d9+HnuOMXh53tS++K96gq6H/9nPnCsdNx/plnpfEnTw7al8eRBLzQrM61sRCbCZZXG19qF1f3dO2AZDmu0vaLYvXsXOSg4pCZq2pFNWcEjeg/97u17TQ7/l5ckS27sB0DX387OcCUHBm2vaEq310Hp3WooCnXHFRGRSBQcIiISiYJDREQiUXCIiEgkCg4REYlEwSEiIpEoOEREJBIFh4iIRKILACvQteXBmS5BRKTG0R6HiIhEouAQEZFIFBwiIhKJgkNERCJRcIiISCQKDhERiUTBISIikSg4REQkEgWHiIhEEmtwmNkAM1thZivN7OoK2g03MzezgqRx3czsDTNbYmaLzCw3HN8rHF5pZmPNTE8kFRGpRrEFh5llAeOAM4EuwPfMrEuKdnnA5cDcpHHZwCPAz9y9K3AqsDOcfBdwCdApfA2Iax1ERGRvce5x9AZWuvtqd98BPA4MTdHuBuAWYFvSuDOAhe7+DoC7b3L33WbWEmjs7m+4uwMPAWfFuA4iIlJGnMHRGvgoabgoHFfKzHoAbd19Spl5OwNuZtPN7C0z+++kZRZVtMykZV9iZoVmVrhx48b9WQ8REUkS591xU5178NKJZgngr8BFKdplA/2A44GtwMtmNh/YUtEy9xjpPgGYAFBQUJCyjYiIRBfnHkcR0DZpuA2wNmk4DzgGmGFm7wN9gMnhCfIi4DV3/8TdtwLTgJ7h+DYVLFNERGIWZ3DMAzqZWXszqwecB0wumejum929mbvnu3s+MAcY4u6FwHSgm5k1CE+UnwIsdfd1wBdm1ifsTXUhMCnGdRARkTJiCw533wWMIQiBZcCT7r7EzK43syGVzPsZ8BeC8FkAvOXuU8PJlwITgZXAKuDFmFZBRERSiPUJgO4+jeAwU/K468ppe2qZ4UcIuuSWbVdIcIhLREQyQFeOi4hIJAoOERGJRMEhIiKRKDhERCQSBYeIiESi4BARkUgUHCIiEomCQ0REIlFwiIhIJAoOERGJRMEhIiKRKDhERCQSBYeIiESi4BARkUhiva36Ae+HUytvIyJSx2iPQ0REIlFwiIhIJAoOERGJRMEhIiKRKDhERCQSBYeIiESi4BARkUgUHCIiEomCQ0REIlFwiIhIJAoOERGJRMEhIiKRKDhERCQSBYeIiESi4BARkUgUHCIiEomCQ0REIlFwiIhIJLEGh5kNMLMVZrbSzK6uoN1wM3MzKwiH883sazNbEL7uTmo7I1xmybQWca6DiIjsKbZnjptZFjAOOB0oAuaZ2WR3X1qmXR5wOTC3zCJWuXv3chY/yt0Lq7pmERGpXJx7HL2Ble6+2t13AI8DQ1O0uwG4BdgWYy0iIlJF4gyO1sBHScNF4bhSZtYDaOvuU1LM397M3jaz18ysf5lp94eHqa41M0v14WZ2iZkVmlnhxo0b92c9REQkSZzBkeoPupdONEsAfwV+laLdOqCdu/cArgQeNbPG4bRR7n4s0D98XZDqw919grsXuHtB8+bN92M1REQkWZzBUQS0TRpuA6xNGs4DjgFmmNn7QB9gspkVuPt2d98E4O7zgVVA53B4Tfj1C+BRgkNiIiJSTWI7OQ7MAzqZWXtgDXAe8P2Sie6+GWhWMmxmM4D/cvdCM2sOfOruu82sA9AJWG1m2UATd//EzHKAwcBLMa6DRND1t7MzXYKIVIPYgsPdd5nZGGA6kAXc5+5LzOx6oNDdJ1cw+8nA9Wa2C9gN/MzdPzWzhsD0MDSyCELjnrjWQSROCtpotL2iiXN7mbtX3uoAV1BQ4IWF6r0rIhKFmc1394Ky43XluIiIRKLgEBGRSBQcIiISiYJDREQiUXCIiEgkCg4REYlEwSEiIpEoOEREJJI6cQGgmW0EPtjH2ZsBn1RhOVVFdUWjuqJRXdHU1rqOcPe97hJbJ4Jjf5hZYaorJzNNdUWjuqJRXdHUtbp0qEpERCJRcIiISCQKjspNyHQB5VBd0aiuaFRXNHWqLp3jEBGRSLTHISIikSg4REQkEgVHGWZ2rpktMbNiMyu3G5uZDTCzFWa20syuroa6DjWzf5nZe+HXQ8ppt9vMFoSvip6yuL/1VLj+ZlbfzJ4Ip881s/y4aolY10VmtjFpG11cDTXdZ2YbzGxxOdPNzMaGNS80s55x15RmXaea2eakbXVdNdXV1sxeNbNl4e/iL1K0qfZtlmZd1b7NzCzXzN40s3fCuv6Yok3V/j66u15JL+Bo4BvADKCgnDZZwCqgA1APeAfoEnNdtwBXh++vBm4up92X1bCNKl1/4OfA3eH784AnakhdFwF3VPPP1MlAT2BxOdMHAi8CBvQB5taQuk4FplTntgo/tyXQM3yfB7yb4vtY7dsszbqqfZuF26BR+D4HmAv0KdOmSn8ftcdRhrsvc/cVlTTrDax099XuvgN4HBgac2lDgQfD9w8CZ8X8eRVJZ/2T630aOM3MrAbUVe3cfSbwaQVNhgIPeWAO0MTMWtaAujLC3de5+1vh+y+AZUDrMs2qfZulWVe1C7fBl+FgTvgq2+upSn8fFRz7pjXwUdJwEfH/AB3m7usg+AEGWpTTLtfMCs1sjpnFFS7prH9pG3ffBWwGmsZUT5S6AM4JD288bWZtY64pHZn4eUrXieEhkBfNrGt1f3h4SKUHwX/RyTK6zSqoCzKwzcwsy8wWABuAf7l7udurKn4fs/d1xgOZmb0EHJ5i0jXuPimdRaQYt9/9miuqK8Ji2rn7WjPrALxiZovcfdX+1lZGOusfyzaqRDqf+QLwmLtvN7OfEfwX9q2Y66pMJrZVOt4iuFfRl2Y2EHge6FRdH25mjYBngCvcfUvZySlmqZZtVkldGdlm7r4b6G5mTYDnzOwYd08+d1Wl26tOBoe7f3s/F1EEJP+n2gZYu5/LrLAuM/vYzFq6+7pwl3xDOctYG35dbWYzCP4rqurgSGf9S9oUmVk2cDDxHxaptC5335Q0eA9wc8w1pSOWn6f9lfxH0d2nmdmdZtbM3WO/mZ+Z5RD8cf6Huz+boklGtllldWVym4Wf+Yk+klIAAANhSURBVHn4ez8ASA6OKv191KGqfTMP6GRm7c2sHsHJpth6MIUmAz8I3/8A2GvPyMwOMbP64ftmwEnA0hhqSWf9k+sdDrzi4Zm5GFVaV5nj4EMIjlNn2mTgwrCnUB9gc8lhyUwys8NLjoObWW+CvxebKp6rSj7XgHuBZe7+l3KaVfs2S6euTGwzM2se7mlgZgcB3waWl2lWtb+P1Xn2/0B4AcMI0nk78DEwPRzfCpiW1G4gQa+KVQSHuOKuqynwMvBe+PXQcHwBMDF83xdYRNCbaBHw4xjr2Wv9geuBIeH7XOApYCXwJtChmr5/ldV1E7Ak3EavAkdVQ02PAeuAneHP1o+BnwE/C6cbMC6seRHl9ObLQF1jkrbVHKBvNdXVj+AwykJgQfgamOltlmZd1b7NgG7A22Fdi4HrUvzcV+nvo245IiIikehQlYiIRKLgEBGRSBQcIiISiYJDREQiUXCIiEgkCg6RKmBmX1beqsL5nw6v9sfMGpnZeDNbFd7tdKaZnWBm9cL3dfLCXak5FBwiGRbezyjL3VeHoyYSXNXbyd27EtzRt5kHN258GRiZkUJFQgoOkSoUXsl8q5ktNrNFZjYyHJ8Ibz+xxMymmNk0MxsezjaK8E4AZtYROAH4nbsXQ3D7GHefGrZ9PmwvkjHa5RWpWmcD3YHjgGbAPDObSXD7l3zgWII7Gy8D7gvnOYngKm6ArsACD25al8pi4PhYKhdJk/Y4RKpWP4K77+5294+B1wj+0PcDnnL3YndfT3C7kxItgY3pLDwMlB1mllfFdYukTcEhUrXKezhORQ/N+ZrgXkIQ3OfoODOr6HezPrBtH2oTqRIKDpGqNRMYGT5YpznB41nfBGYTPEAqYWaHETxitMQy4EgAD56dUgj8Mekuq53MbGj4vimw0d13VtcKiZSl4BCpWs8R3KX0HeAV4L/DQ1PPENyBdjEwnuDJcZvDeaayZ5BcTPBAr5VmtojguSElz5r4JjAt3lUQqZjujitSTcyskQdPhmtKsBdykruvD5+h8Go4XN5J8ZJlPAv8xt1XVEPJIimpV5VI9ZkSPnCnHnBDuCeCu39tZr8neC70h+XNHD6c6nmFhmSa9jhERCQSneMQEZFIFBwiIhKJgkNERCJRcIiISCQKDhERieT/A4uJrZkrZeZrAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "train_means = grid.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "# plot results\n",
    "n_Cs = len(Cs)\n",
    "number_penaltys = len(penaltys)\n",
    "\n",
    "train_scores = np.array(train_means).reshape(n_Cs,number_penaltys)\n",
    "train_stds = np.array(train_stds).reshape(n_Cs,number_penaltys)\n",
    "\n",
    "\n",
    "x_axis = np.log10(Cs)\n",
    "for i, value in enumerate(penaltys):\n",
    "    #pyplot.plot(log(Cs), test_scores[i], label= 'penalty:'   + str(value))\n",
    "    plt.errorbar(x_axis, -train_scores[:,i], yerr=train_stds[:,i] ,label = penaltys[i] +' Train')\n",
    "    \n",
    "plt.legend()\n",
    "plt.xlabel( 'log(C)' )                                                                                                      \n",
    "plt.ylabel( 'logloss' )\n",
    "plt.savefig('LogisticGridSearchCV_C.png' )\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as cPickle\n",
    "\n",
    "cPickle.dump(grid.best_estimator_, open(\"logloss_l1.pkl\", 'wb'))\n",
    "cPickle.dump(grid_accuracy.best_estimator_, open(\"accuracy_l2.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
